Data Import
df_country <- read.csv("./cases_malaysia.csv", header=TRUE)
# Convert date column & sort
df_country$date <- as.Date(df_country$date, format="%Y-%m-%d")
df_country <- df_country[order(df_country$date),]
# Filter data
df_data <- df_country[,c("date", "cases_new", "cases_active")]
Linear train
split_ratio <- 0.7
set.seed(168)
linear_index <- createDataPartition(df_data$cases_new, p=split_ratio, list=FALSE)
linear_train <- df_data[linear_index,]
linear_test <- df_data[-linear_index,]
linear_model <- lm(cases_new~cases_active,data=linear_train)
summary(linear_model)
##
## Call:
## lm(formula = cases_new ~ cases_active, data = linear_train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3742.5 -215.4 -127.7 221.8 4264.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.286e+02 5.175e+01 2.485 0.0133 *
## cases_active 8.244e-02 6.365e-04 129.506 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 948.8 on 494 degrees of freedom
## Multiple R-squared: 0.9714, Adjusted R-squared: 0.9713
## F-statistic: 1.677e+04 on 1 and 494 DF, p-value: < 2.2e-16
# linear_model <- train(cases_new~date, data=linear_train, method="lm", trControl=linear_train_control)
plot(linear_model)




linear_prediction <- linear_model %>% predict(linear_test)
linear_compare <- data.frame(actual=linear_test$cases_new, predicted=linear_prediction)
linear_compare
## actual predicted
## 1 4 128.9024
## 5 3 129.1497
## 8 0 129.2322
## 10 0 129.2322
## 14 1 129.7268
## 16 1 129.7268
## 17 1 129.8092
## 19 0 129.8092
## 21 1 129.5619
## 26 0 128.9849
## 28 0 128.9849
## 33 0 128.7376
## 36 0 128.8200
## 39 7 129.7268
## 41 5 131.2106
## 44 6 134.6729
## 45 18 136.0743
## 46 12 137.0635
## 51 190 160.3927
## 52 125 170.6972
## 61 172 258.4908
## 72 179 342.5749
## 73 131 333.7543
## 74 170 341.0910
## 76 109 339.0302
## 78 184 331.5285
## 80 134 331.5285
## 82 85 321.5539
## 86 84 302.0167
## 87 36 296.8233
## 91 88 287.9202
## 94 40 274.0711
## 96 94 273.4940
## 105 68 257.5016
## 108 70 252.5555
## 111 40 240.3550
## 114 22 228.4019
## 115 47 228.5668
## 117 31 226.6708
## 120 48 224.1153
## 124 15 245.7958
## 140 33 220.4057
## 141 43 212.1621
## 143 41 208.8647
## 146 14 162.2887
## 152 6 148.7693
## 166 3 134.4256
## 167 6 133.8485
## 174 3 135.0026
## 175 18 136.1567
## 179 15 139.3717
## 188 8 146.9558
## 189 12 145.7192
## 196 25 145.9665
## 200 9 142.5043
## 204 26 146.6260
## 205 25 146.3787
## 207 7 144.4003
## 212 10 143.6584
## 213 7 143.7408
## 216 5 143.1637
## 217 10 141.0204
## 219 17 141.6799
## 220 6 141.6799
## 224 11 142.1745
## 226 6 141.2677
## 227 62 145.6368
## 229 24 154.2925
## 230 45 156.0237
## 232 58 173.9946
## 241 57 190.3992
## 243 147 191.9655
## 245 110 199.2198
## 247 150 206.7214
## 250 89 220.9003
## 255 432 320.3998
## 272 847 801.6574
## 278 801 962.4063
## 280 799 984.2517
## 281 659 955.7291
## 282 957 954.1628
## 285 1032 979.3056
## 288 1168 1088.6149
## 290 972 1059.6801
## 291 869 1071.3859
## 292 822 1075.5901
## 295 1114 1127.1946
## 297 1103 1166.0217
## 299 660 1183.6629
## 301 958 1217.2140
## 306 970 1196.6052
## 311 1212 998.7603
## 315 1141 1017.6380
## 319 1012 1022.5017
## 323 1937 1260.6575
## 325 1371 1267.5821
## 327 1295 1343.6699
## 329 1683 1375.4899
## 331 1340 1410.1128
## 332 2018 1486.9425
## 335 1581 1673.8235
## 337 2335 1777.2799
## 341 1870 1984.7697
## 350 2643 2195.8866
## 351 2451 2281.7843
## 353 2232 2476.9088
## 355 2985 2791.1524
## 356 3337 2924.2030
## 357 3211 3027.9067
## 358 4029 3181.7311
## 362 4008 3506.5264
## 363 3170 3561.3459
## 370 4094 3677.1676
## 371 5725 3865.3675
## 373 5298 4085.3051
## 380 3731 4340.5250
## 383 3288 4418.7561
## 388 2176 4001.9629
## 393 2461 2960.3922
## 401 2437 2281.2897
## 402 1828 2226.7175
## 404 1745 2101.6630
## 405 2063 2030.6037
## 407 1680 1865.6506
## 410 1280 1663.3542
## 413 1575 1529.2319
## 425 1268 1328.9140
## 427 1275 1298.5777
## 429 1302 1307.2335
## 431 1133 1292.3127
## 436 1349 1317.7027
## 440 1285 1292.8897
## 441 1854 1342.3509
## 442 1510 1363.7017
## 444 1317 1427.2593
## 446 1889 1499.0605
## 451 2078 1813.9636
## 460 3142 2323.4141
## 462 3788 2529.0079
## 468 3551 2900.5441
## 470 4519 3130.4563
## 480 4865 3803.6235
## 482 6806 4244.6526
## 493 6824 6648.1380
## 494 7105 6724.6380
## 496 8209 6962.7938
## 497 7748 7048.6090
## 502 6239 6819.1089
## 507 4949 5996.8986
## 509 5150 5695.0204
## 510 5738 5540.6189
## 512 5911 5410.7008
## 516 5244 5101.8979
## 517 5841 5130.6679
## 522 6437 5266.6038
## 525 6982 5525.7806
## 527 6045 5651.3296
## 531 8868 6433.3114
## 535 8574 7559.6258
## 539 12541 9410.5469
## 541 10710 10262.1866
## 544 11985 11304.4168
## 554 17786 15480.2624
## 555 17150 15933.4920
## 559 20596 17688.5408
## 560 20889 18029.5759
## 561 19257 18243.9903
## 570 19740 20289.8711
## 574 23564 20953.3933
## 578 20837 21375.8745
## 583 20579 21837.0179
## 584 19268 21650.5491
## 587 20988 21393.7630
## 591 17352 20610.7095
## 593 19733 20297.6200
## 598 16073 18678.8368
## 600 19495 18509.5146
## 602 17577 18188.9235
## 605 14345 17107.9486
## 606 15759 17021.9685
## 613 11332 14459.7127
## 619 8075 11864.3178
## 621 9380 10986.1339
## 624 8743 10004.9058
## 625 7373 9702.5329
## 628 7950 8833.5818
## 630 7420 8236.9147
## 636 6210 6839.2231
## 637 6630 6750.9349
## 639 5666 6356.7290
## 642 6148 6014.5398
## 654 4543 5197.6054
## 656 6243 5294.4669
## 658 6517 5408.2277
## 661 5143 5548.8625
## 663 6288 5617.9433
## 666 5859 5841.7553
## 667 4854 5782.6491
## 671 6144 5780.6707
## 675 4087 5512.7558
## 676 4879 5486.2116
## 677 5439 5371.2143
## 681 4298 5225.3037
## 684 5020 5134.2950
## 688 3490 4969.0121
## 689 3504 4892.5121
## 695 3108 4524.7680
## 697 3140 4324.6149
## 701 3160 3879.3816
## 705 3683 3558.7080
## 706 3997 3558.7904
## 708 3386 3511.2252
data.frame(
R2 = R2(linear_prediction, linear_test$cases_new),
RMSE = RMSE(linear_prediction, linear_test$cases_new),
MAE = MAE(linear_prediction, linear_test$cases_new)
)
## R2 RMSE MAE
## 1 0.9721543 906.3865 545.7654
# Chart init
df_predicted <- data.frame(date=linear_test$date, cases_new=linear_prediction)
df_actual <- linear_test
df_train <- linear_train
lm_chart <- plot_ly()
# Predicted Data
lm_chart <- lm_chart %>%
add_trace(
x = df_predicted[["date"]], y = df_predicted[["cases_new"]],
name = "Predcited Data",
type = 'scatter',
mode = 'lines',
line = list(color = 'red', width = 3)
)
# Test Data
lm_chart <- lm_chart %>%
add_trace(
x = df_actual[["date"]], y = df_actual[["cases_new"]],
name = "Actual Data",
type = 'scatter',
mode = 'lines',
line = list(color = 'skyblue', width = 3)
)
lm_chart <- lm_chart %>%
add_trace(
x = df_train[["date"]], y = df_train[["cases_new"]],
name = "Train Data",
type = "scatter",
mode = "lines",
line = list(color = 'green', width = 2)
)
# Set figure title, x and y-axes titles
lm_chart <- lm_chart %>% layout(
title = "Model Comparison of Daily New Cases",
xaxis = list(title="Recorded Time"),
yaxis = list(title="Daily Count of New Cases")
)%>%
layout(plot_bgcolor='#e5ecf6',
xaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff'),
yaxis = list(
zerolinecolor = '#ffff',
zerolinewidth = 2,
gridcolor = 'ffff')
)
lm_chart